1.1.1 Study introduction¶
The City of Buenos Aires published the vaccionation stats regarding the firs cuarter of the year 2021. The data shows all the people in the city area, that has received the COVID-19 Vaccine during 14JAN0201 and 14APR2021, in public and private institutions.
The variables created for the analysis of this view are the following:
Standard Variables:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import re
from matplotlib.cm import get_cmap
from matplotlib import cm
import plotly.io as pio
import plotly.express as px
import plotly.figure_factory as ff
import plotly.graph_objects as go
from plotly.subplots import make_subplots
pio.renderers.default = 'notebook'
import warnings
warnings.filterwarnings('ignore')
import math
pd.set_option('display.max_rows', 1000)
pd.set_option('display.max_columns', 1000)
df = pd.read_csv('dataset_total_vacunas.csv')
df.head()
| FECHA_ADMINISTRACION | GRUPO_ETARIO | GENERO | VACUNA | TIPO_EFECTOR | DOSIS_1 | DOSIS_2 | ID_CARGA | DOSIS_3 | |
|---|---|---|---|---|---|---|---|---|---|
| 0 | 14JAN2021:00:00:00 | 41 a 50 | M | Sputnik | Público | 138 | 0 | NaN | 0.0 |
| 1 | 14JAN2021:00:00:00 | 41 a 50 | M | Sputnik | Público nacional | 28 | 0 | NaN | 0.0 |
| 2 | 14JAN2021:00:00:00 | 51 a 60 | F | Sputnik | Privado | 54 | 0 | NaN | 0.0 |
| 3 | 14JAN2021:00:00:00 | 51 a 60 | F | Sputnik | Público | 173 | 0 | NaN | 0.0 |
| 4 | 14JAN2021:00:00:00 | 51 a 60 | F | Sputnik | Público nacional | 27 | 0 | NaN | 0.0 |
table_count = 1
fig_count = 1
Over 38.531 cases, the data is divided between age ranges starting at less than 30 years, and going up in 9 years ranges.
df_resumen = df.groupby('GRUPO_ETARIO').agg({'VACUNA': pd.Series.count}).reset_index()
df_resumen.rename(columns={'VACUNA': 'Q - Vacunas'}, inplace=True)
display(df_resumen.style.set_caption(f'Table{table_count}. Number of People percibing Vaccination'))
table_count
| GRUPO_ETARIO | Q - Vacunas | |
|---|---|---|
| 0 | 30 o menos | 6244 |
| 1 | 31 a 40 | 5443 |
| 2 | 41 a 50 | 5248 |
| 3 | 51 a 60 | 5182 |
| 4 | 61 a 70 | 4959 |
| 5 | 71 a 80 | 4583 |
| 6 | 81 a 90 | 3889 |
| 7 | 91 o mas | 2983 |
1
fig = px.pie(df_resumen, values='Q - Vacunas', names='GRUPO_ETARIO', color_discrete_sequence=px.colors.sequential.Blues_r)
fig.update_layout(width=900,
height=600,
title='Figure 1. Category Age Distribution' )
# ethn = df[df.Ethnicity.isna()]['PatientID'].nunique() / df.PatientID.nunique()
# print(f"In {(1-ethn):.2%} of cases the ethnicity was mentioned")
fig.show()
Figure 1 shows the porcentage of people vaccinated during the period of analysis clustered by age range
g = df_res
d = ["Q 1er Dosis",'Q 2da Dosis','Q 3ra Dosis']
fig = px.bar(g, x="VACUNA", y= d, color_discrete_sequence=px.colors.sequential.Blues_r)
fig.update_layout(
xaxis_title="",
yaxis_title="Applied Dosis",
width=800,
height=600,
title='Figure 2. Dosis distribution by Vaccine Brand'
).update_xaxes(categoryorder="total descending")
fig.show()
df_dos = df
df_dos['VACUNA'] = df['VACUNA'].replace(['vacuna Coronavirus (CANSINO), vial x 1 dosis',
'vacuna Coronavirus (PFIZER) 1ra - 2da dosis, vial x 6 dosis (0.3ml)',
'vacuna Coronavirus (PFIZER) pediátrica, vial x 10 dosis (0.2ml)'],
['CANSINO','PFIZER','PFIZER (Pediatrica)'])
df_dos['Total_dosis'] = df[['DOSIS_1', 'DOSIS_2', 'DOSIS_3']].astype(float).sum(1)
df_dos
| FECHA_ADMINISTRACION | GRUPO_ETARIO | GENERO | VACUNA | TIPO_EFECTOR | DOSIS_1 | DOSIS_2 | ID_CARGA | DOSIS_3 | Total_dosis | |
|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 14JAN2021:00:00:00 | 41 a 50 | M | Sputnik | Público | 138 | 0 | NaN | 0.0 | 138.0 |
| 1 | 14JAN2021:00:00:00 | 41 a 50 | M | Sputnik | Público nacional | 28 | 0 | NaN | 0.0 | 28.0 |
| 2 | 14JAN2021:00:00:00 | 51 a 60 | F | Sputnik | Privado | 54 | 0 | NaN | 0.0 | 54.0 |
| 3 | 14JAN2021:00:00:00 | 51 a 60 | F | Sputnik | Público | 173 | 0 | NaN | 0.0 | 173.0 |
| 4 | 14JAN2021:00:00:00 | 51 a 60 | F | Sputnik | Público nacional | 27 | 0 | NaN | 0.0 | 27.0 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 38526 | 03MAR2022:00:00:00 | 91 o mas | M | PFIZER | Público | 0 | 2 | NaN | 3.0 | 5.0 |
| 38527 | 14APR2022:00:00:00 | 30 o menos | F | Moderna | Público | 3 | 5 | NaN | 46.0 | 54.0 |
| 38528 | 14APR2022:00:00:00 | 30 o menos | F | Sinopharm | Público | 35 | 60 | NaN | 5.0 | 100.0 |
| 38529 | 14APR2022:00:00:00 | 30 o menos | F | Sputnik | Público | 0 | 6 | NaN | 5.0 | 11.0 |
| 38530 | 14APR2022:00:00:00 | 30 o menos | F | PFIZER | Público | 13 | 20 | NaN | 125.0 | 158.0 |
38531 rows × 10 columns
g = df_dos.groupby(['VACUNA','GENERO']).agg({'Total_dosis': pd.Series.sum}).reset_index()
fig = px.bar(g, x='Total_dosis', y='VACUNA',color='GENERO',text='Total_dosis',color_discrete_sequence=px.colors.sequential.Blues_r)
fig.update_layout(
xaxis_title='',
yaxis_title='Vaccine Brand',
width=1000,
height=800,
title='Figure 3. Vaccine Brand by Gender - Sum of 3 dosis'
).update_yaxes(categoryorder="total ascending")
fig.show()
d = df_dos.groupby(['VACUNA','TIPO_EFECTOR']).agg({'Total_dosis': pd.Series.sum}).reset_index()
fig = px.bar(d, y='Total_dosis', x='VACUNA',color='TIPO_EFECTOR',text='Total_dosis', color_discrete_sequence=px.colors.sequential.Blues_r)
fig.update_layout(
xaxis_title='Vaccine Brand',
yaxis_title='Q of Vacciones',
width=1000,
height=800,
title='Figure 4. Vaccine Brand by efector kind - Sum of 3 dosis'
).update_xaxes(categoryorder="total ascending")
fig.show()
df_resumen_dos = df_dos.groupby('TIPO_EFECTOR').agg({'Total_dosis': pd.Series.sum}).reset_index()
df_resumen_dos.rename(columns={'TIPO_EFECTOR': 'Tipo Organismo'}, inplace=True)
# df_resumen_dos['Dosis_Totales'] = df_resumen_dos['Total_dosis']
# df_resumen_dos['Dosis_Totales'] = df_resumen_dos['Dosis_Totales'].map('{:,.2f}'.format)
# df_resumen_dos = df_resumen_dos.drop('Total_dosis',axis=1)
display(df_resumen_dos.style.set_caption(f'Table{table_count}. Number of People percibing Vaccination'))
table_count
| Tipo Organismo | Total_dosis | |
|---|---|---|
| 0 | Privado | 177318.000000 |
| 1 | Público | 7736928.000000 |
| 2 | Público nacional | 21701.000000 |
1
fig = px.pie(df_resumen_dos, values='Total_dosis', names='Tipo Organismo', color_discrete_sequence=px.colors.sequential.Blues_r)
fig.update_layout(width=900,
height=600,
title='Figure 5. Sort of Facility (Pirvate - Public)' )
# ethn = df[df.Ethnicity.isna()]['PatientID'].nunique() / df.PatientID.nunique()
# print(f"In {(1-ethn):.2%} of cases the ethnicity was mentioned")
fig.show()
As the pie shows, 97.5% of the inoculations were given at Public facilities, when only 2.23% at private, the 0.273% is refered as 'Publico Nacional' being a mix of public and co-owned by the Buenos Aires city hall and the national goverment
df_dos['Fecha'] = pd.to_datetime(df_dos['FECHA_ADMINISTRACION'], format='%d%b%Y:%H:%M:%S')
df_dos['Mes'] = df_dos['Fecha'].dt.month
# df_dos['Fecha'] = df_dos['FECHA_ADMINISTRACION'].str[:9]
# df_dos
df_dos['Mes'].replace(to_replace=[1,2,3,4,5,6,7,8,9,10,11,12],value=['Ene','Feb','Mar','Abr','May',
'Jun','Jul','Ago','Sep','Oct','Nov','Dic'
],inplace=True)
df_dos
| FECHA_ADMINISTRACION | GRUPO_ETARIO | GENERO | VACUNA | TIPO_EFECTOR | DOSIS_1 | DOSIS_2 | ID_CARGA | DOSIS_3 | Total_dosis | Fecha | Mes | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 14JAN2021:00:00:00 | 41 a 50 | M | Sputnik | Público | 138 | 0 | NaN | 0.0 | 138.0 | 2021-01-14 | Ene |
| 1 | 14JAN2021:00:00:00 | 41 a 50 | M | Sputnik | Público nacional | 28 | 0 | NaN | 0.0 | 28.0 | 2021-01-14 | Ene |
| 2 | 14JAN2021:00:00:00 | 51 a 60 | F | Sputnik | Privado | 54 | 0 | NaN | 0.0 | 54.0 | 2021-01-14 | Ene |
| 3 | 14JAN2021:00:00:00 | 51 a 60 | F | Sputnik | Público | 173 | 0 | NaN | 0.0 | 173.0 | 2021-01-14 | Ene |
| 4 | 14JAN2021:00:00:00 | 51 a 60 | F | Sputnik | Público nacional | 27 | 0 | NaN | 0.0 | 27.0 | 2021-01-14 | Ene |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 38526 | 03MAR2022:00:00:00 | 91 o mas | M | PFIZER | Público | 0 | 2 | NaN | 3.0 | 5.0 | 2022-03-03 | Mar |
| 38527 | 14APR2022:00:00:00 | 30 o menos | F | Moderna | Público | 3 | 5 | NaN | 46.0 | 54.0 | 2022-04-14 | Abr |
| 38528 | 14APR2022:00:00:00 | 30 o menos | F | Sinopharm | Público | 35 | 60 | NaN | 5.0 | 100.0 | 2022-04-14 | Abr |
| 38529 | 14APR2022:00:00:00 | 30 o menos | F | Sputnik | Público | 0 | 6 | NaN | 5.0 | 11.0 | 2022-04-14 | Abr |
| 38530 | 14APR2022:00:00:00 | 30 o menos | F | PFIZER | Público | 13 | 20 | NaN | 125.0 | 158.0 | 2022-04-14 | Abr |
38531 rows × 12 columns
# Histogram
g = df_dos.groupby(['Total_dosis', 'VACUNA'])['Mes'].max().reset_index()
fig = px.histogram(g, x='Mes', color='VACUNA', color_discrete_sequence=px.colors.sequential.Blues_r)
fig.update_layout(
xaxis_title="Age",
yaxis_title="Number of Patients",
title="Figure 6. Distribution of Vaccines During the year",
width = 900,
height = 600
).update_xaxes(categoryorder="trace")
fig.show()